\contentsline {chapter}{\numberline {1}What is Information extraction}{6}{chapter.1}
\contentsline {section}{\numberline {1.1}Basics}{6}{section.1.1}
\contentsline {section}{\numberline {1.2}Motivation}{6}{section.1.2}
\contentsline {section}{\numberline {1.3}Analysis of IE results and their use in this thesis}{7}{section.1.3}
\contentsline {section}{\numberline {1.4}What is information}{9}{section.1.4}
\contentsline {subsection}{\numberline {1.4.1}Named entity recognition and relation extraction}{9}{subsection.1.4.1}
\contentsline {subsection}{\numberline {1.4.2}Co-reference resolution}{10}{subsection.1.4.2}
\contentsline {subsection}{\numberline {1.4.3}Semi-structured IE}{10}{subsection.1.4.3}
\contentsline {subsection}{\numberline {1.4.4}Language and vocabulary analysis}{11}{subsection.1.4.4}
\contentsline {subsection}{\numberline {1.4.5}Summary}{11}{subsection.1.4.5}
\contentsline {chapter}{\numberline {2}Description of the provided documents}{12}{chapter.2}
\contentsline {section}{\numberline {2.1}Documents}{12}{section.2.1}
\contentsline {subsection}{\numberline {2.1.1}Field of experience summary}{12}{subsection.2.1.1}
\contentsline {subsection}{\numberline {2.1.2}Experience records}{12}{subsection.2.1.2}
\contentsline {subsection}{\numberline {2.1.3}Expert's profiles}{12}{subsection.2.1.3}
\contentsline {subsection}{\numberline {2.1.4}Substitution program}{13}{subsection.2.1.4}
\contentsline {subsection}{\numberline {2.1.5}Knowledge management concept}{13}{subsection.2.1.5}
\contentsline {subsection}{\numberline {2.1.6}Forms}{13}{subsection.2.1.6}
\contentsline {subsection}{\numberline {2.1.7}Yearly reports}{13}{subsection.2.1.7}
\contentsline {subsection}{\numberline {2.1.8}General presentations}{13}{subsection.2.1.8}
\contentsline {subsection}{\numberline {2.1.9}Reports}{13}{subsection.2.1.9}
\contentsline {subsection}{\numberline {2.1.10}Experience records from KM}{13}{subsection.2.1.10}
\contentsline {subsection}{\numberline {2.1.11}Management documentation}{14}{subsection.2.1.11}
\contentsline {subsection}{\numberline {2.1.12}General enterprise documentation}{14}{subsection.2.1.12}
\contentsline {subsection}{\numberline {2.1.13}Supplier's training}{14}{subsection.2.1.13}
\contentsline {section}{\numberline {2.2}Summary}{14}{section.2.2}
\contentsline {chapter}{\numberline {3}Natural language processing}{15}{chapter.3}
\contentsline {section}{\numberline {3.1}Motivation}{15}{section.3.1}
\contentsline {section}{\numberline {3.2}Analysis of Natural language text processing}{16}{section.3.2}
\contentsline {paragraph}{}{16}{section*.6}
\contentsline {section}{\numberline {3.3}NLP tool Treex}{16}{section.3.3}
\contentsline {subsection}{\numberline {3.3.1}Running Treex}{18}{subsection.3.3.1}
\contentsline {section}{\numberline {3.4}What carries information in a sentence}{20}{section.3.4}
\contentsline {subsection}{\numberline {3.4.1}Simple sentence containing verb}{20}{subsection.3.4.1}
\contentsline {subsection}{\numberline {3.4.2}Ellipsis}{21}{subsection.3.4.2}
\contentsline {subsection}{\numberline {3.4.3}Coordination}{21}{subsection.3.4.3}
\contentsline {subsection}{\numberline {3.4.4}Parenthesis}{21}{subsection.3.4.4}
\contentsline {subsection}{\numberline {3.4.5}Complex phenomenons}{21}{subsection.3.4.5}
\contentsline {chapter}{\numberline {4}Information Extraction}{22}{chapter.4}
\contentsline {section}{\numberline {4.1}Dependency patterns}{22}{section.4.1}
\contentsline {subsection}{\numberline {4.1.1}Predicate-Argument model (SVO)}{23}{subsection.4.1.1}
\contentsline {subsection}{\numberline {4.1.2}Chain model}{23}{subsection.4.1.2}
\contentsline {subsection}{\numberline {4.1.3}Linked Chain model}{23}{subsection.4.1.3}
\contentsline {subsection}{\numberline {4.1.4}Unconstrained Linked Chain model (ULC)}{23}{subsection.4.1.4}
\contentsline {subsection}{\numberline {4.1.5}Shortest Path model}{24}{subsection.4.1.5}
\contentsline {subsection}{\numberline {4.1.6}Subtree model}{24}{subsection.4.1.6}
\contentsline {subsection}{\numberline {4.1.7}Patterns comparison}{24}{subsection.4.1.7}
\contentsline {section}{\numberline {4.2}Analysis of patterns and possible solutions}{25}{section.4.2}
\contentsline {subsection}{\numberline {4.2.1}Named entity detection and relation extraction}{26}{subsection.4.2.1}
\contentsline {subsection}{\numberline {4.2.2}Co-reference resolution}{26}{subsection.4.2.2}
\contentsline {subsection}{\numberline {4.2.3}Coordination}{27}{subsection.4.2.3}
\contentsline {subsection}{\numberline {4.2.4}Terminology extraction}{27}{subsection.4.2.4}
\contentsline {section}{\numberline {4.3}Summary}{28}{section.4.3}
\contentsline {chapter}{\numberline {5}Describing and storing the information}{29}{chapter.5}
\contentsline {section}{\numberline {5.1}How to describe informations}{29}{section.5.1}
\contentsline {subsection}{\numberline {5.1.1}RDF definition}{30}{subsection.5.1.1}
\contentsline {subsection}{\numberline {5.1.2}RDF Schema definition}{31}{subsection.5.1.2}
\contentsline {subsection}{\numberline {5.1.3}How to code extracted information}{31}{subsection.5.1.3}
\contentsline {section}{\numberline {5.2}Modeling our informations}{31}{section.5.2}
\contentsline {subsection}{\numberline {5.2.1}Representing a subject, predicate, object}{32}{subsection.5.2.1}
\contentsline {subsection}{\numberline {5.2.2}Terminology representation}{32}{subsection.5.2.2}
\contentsline {subsection}{\numberline {5.2.3}Document representation}{33}{subsection.5.2.3}
\contentsline {section}{\numberline {5.3}Searching for a proper database}{33}{section.5.3}
\contentsline {section}{\numberline {5.4}Summary}{35}{section.5.4}
\contentsline {chapter}{\numberline {6}Search Engine}{36}{chapter.6}
\contentsline {section}{\numberline {6.1}Available informations}{36}{section.6.1}
\contentsline {section}{\numberline {6.2}Search specifications and possible solutions}{36}{section.6.2}
\contentsline {subsection}{\numberline {6.2.1}Search based on triples match}{37}{subsection.6.2.1}
\contentsline {subsection}{\numberline {6.2.2}Search based on terminology match}{37}{subsection.6.2.2}
\contentsline {subsection}{\numberline {6.2.3}Relevance of the terminology}{38}{subsection.6.2.3}
\contentsline {subsection}{\numberline {6.2.4}Non document search specification}{39}{subsection.6.2.4}
\contentsline {section}{\numberline {6.3}SPARQL Definition}{39}{section.6.3}
\contentsline {section}{\numberline {6.4}Conclusion}{40}{section.6.4}
\contentsline {chapter}{\numberline {7}Implementation}{41}{chapter.7}
\contentsline {section}{\numberline {7.1}Requirements}{41}{section.7.1}
\contentsline {section}{\numberline {7.2}Architecture}{42}{section.7.2}
\contentsline {section}{\numberline {7.3}Architecture in details}{42}{section.7.3}
\contentsline {subsection}{\numberline {7.3.1}Model}{42}{subsection.7.3.1}
\contentsline {subsection}{\numberline {7.3.2}View and Controller}{44}{subsection.7.3.2}
\contentsline {chapter}{\numberline {8}Implementation of IE}{45}{chapter.8}
\contentsline {section}{\numberline {8.1}Plain Text Extraction}{45}{section.8.1}
\contentsline {subsection}{\numberline {8.1.1}Implementation}{45}{subsection.8.1.1}
\contentsline {subsection}{\numberline {8.1.2}Observation}{46}{subsection.8.1.2}
\contentsline {section}{\numberline {8.2}Linguistic processing}{47}{section.8.2}
\contentsline {subsection}{\numberline {8.2.1}Implementation}{47}{subsection.8.2.1}
\contentsline {section}{\numberline {8.3}Observation}{48}{section.8.3}
\contentsline {section}{\numberline {8.4}Document Tree Structure}{48}{section.8.4}
\contentsline {subsection}{\numberline {8.4.1}Implementation}{48}{subsection.8.4.1}
\contentsline {section}{\numberline {8.5}IE implementation}{49}{section.8.5}
\contentsline {subsection}{\numberline {8.5.1}Search rules and search process}{49}{subsection.8.5.1}
\contentsline {subsection}{\numberline {8.5.2}Writing search configuration}{51}{subsection.8.5.2}
\contentsline {chapter}{\numberline {9}Database Implementation}{52}{chapter.9}
\contentsline {section}{\numberline {9.1}Ontology}{52}{section.9.1}
\contentsline {section}{\numberline {9.2}Working with Virtuoso database}{53}{section.9.2}
\contentsline {section}{\numberline {9.3}Converting and storing extracted informations into a graph}{53}{section.9.3}
\contentsline {section}{\numberline {9.4}Implementation}{54}{section.9.4}
\contentsline {chapter}{\numberline {10}Search Implementation}{55}{chapter.10}
\contentsline {section}{\numberline {10.1}Search requirements}{55}{section.10.1}
\contentsline {section}{\numberline {10.2}Implementation}{55}{section.10.2}
\contentsline {subsection}{\numberline {10.2.1}Search for triples}{56}{subsection.10.2.1}
\contentsline {subsection}{\numberline {10.2.2}Search for resource}{57}{subsection.10.2.2}
\contentsline {subsection}{\numberline {10.2.3}Document search based on a triple}{57}{subsection.10.2.3}
\contentsline {subsection}{\numberline {10.2.4}Document search based on a terminology}{57}{subsection.10.2.4}
\contentsline {subsection}{\numberline {10.2.5}Document search based on a triple and a terminology}{58}{subsection.10.2.5}
\contentsline {chapter}{\numberline {11}User Interface}{59}{chapter.11}
\contentsline {section}{\numberline {11.1}Overview}{59}{section.11.1}
\contentsline {section}{\numberline {11.2}Triples and terminologies}{59}{section.11.2}
\contentsline {section}{\numberline {11.3}Processing documents}{60}{section.11.3}
\contentsline {section}{\numberline {11.4}Examples}{60}{section.11.4}
\contentsline {chapter}{\numberline {12}Results evaluation}{63}{chapter.12}
\contentsline {section}{\numberline {12.1}Results of the thesis}{63}{section.12.1}
\contentsline {section}{\numberline {12.2}Summary}{65}{section.12.2}
\contentsline {chapter}{Conclusion}{66}{chapter*.7}
\contentsline {chapter}{Bibliography}{67}{chapter*.8}
\contentsline {chapter}{List of Tables}{71}{chapter*.9}
\contentsline {chapter}{List of Figures}{72}{chapter*.10}
\contentsline {chapter}{List of Abbreviations}{73}{chapter*.11}
\contentsline {chapter}{\numberline {A}Appendix - Treex Installation}{74}{Appendix.a.A}
\contentsline {paragraph}{Missing packages, modules:}{74}{section*.12}
\contentsline {chapter}{\numberline {B}Appendix - Ontology}{75}{Appendix.b.B}
\contentsline {chapter}{\numberline {C}Appendix - Search configuration example}{77}{Appendix.c.C}
